rootdir="/media/inserm-root/P4/Single_cell_rna_seq_analyses_eline/sc3samples_whitelist"
setwd(rootdir) # Set this to correct location
source("/media/inserm-root/P4/Single_cell_rna_seq_analyses_eline/project/R/seurat_processing.R")
source("/media/inserm-root/P4/Single_cell_rna_seq_analyses_eline/project/R/DE-analysis.R")
source("/media/inserm-root/P4/Single_cell_rna_seq_analyses_eline/project/R/visualisation_tools_and_pathway_analysis.R")
merged_gene<-readRDS("/media/inserm-root/P4/Single_cell_rna_seq_analyses_eline/sc3samples_whitelist/merge_integration/cell_cycle_regression/regressed_obj_gene.rds")
merged_iso<-readRDS("/media/inserm-root/P4/Single_cell_rna_seq_analyses_eline/sc3samples_whitelist/merge_integration/cell_cycle_regression/regressed_obj_isoform.rds")
How many genes and transcripts overlap in our different clusters?
plot_venn(merged_gene,"gene")
plot_venn(merged_iso,"isoform")
We’ll run the DE testing on the unintegrated assay.
DE_samples_iso<-DE_samples(merged_iso,
output_dir=paste0(rootdir,"/analyses"),
"", #to specify what we are working with in the output file name
topn=15)
markers_sample_iso<-DE_samples_iso$markers
head(markers_sample_iso)
## p_val avg_log2FC pct.1
## ENST00000309268.11--EEF1A1 0 18.20146 1.000
## ENSG00000196262.15-44796680-44801631-1--PPIA 0 17.34552 0.999
## ENSG00000118181.11-119015712-119018582-1--RPS25 0 16.58348 0.998
## ENSG00000281181.1-8437202-8437688-1--ENSG00000281181 0 16.06028 0.975
## ENSG00000186468.13-82276060-82278348-1--RPS23 0 15.70314 0.968
## ENSG00000169567.13-131159292-131165106-2--HINT1 0 15.50109 0.954
## pct.2 p_val_adj cluster
## ENST00000309268.11--EEF1A1 0 0 ctrl
## ENSG00000196262.15-44796680-44801631-1--PPIA 0 0 ctrl
## ENSG00000118181.11-119015712-119018582-1--RPS25 0 0 ctrl
## ENSG00000281181.1-8437202-8437688-1--ENSG00000281181 0 0 ctrl
## ENSG00000186468.13-82276060-82278348-1--RPS23 0 0 ctrl
## ENSG00000169567.13-131159292-131165106-2--HINT1 0 0 ctrl
## gene
## ENST00000309268.11--EEF1A1 ENST00000309268.11--EEF1A1
## ENSG00000196262.15-44796680-44801631-1--PPIA ENSG00000196262.15-44796680-44801631-1--PPIA
## ENSG00000118181.11-119015712-119018582-1--RPS25 ENSG00000118181.11-119015712-119018582-1--RPS25
## ENSG00000281181.1-8437202-8437688-1--ENSG00000281181 ENSG00000281181.1-8437202-8437688-1--ENSG00000281181
## ENSG00000186468.13-82276060-82278348-1--RPS23 ENSG00000186468.13-82276060-82278348-1--RPS23
## ENSG00000169567.13-131159292-131165106-2--HINT1 ENSG00000169567.13-131159292-131165106-2--HINT1
Let’s have a look at the genes that have differentially expressed transcripts between the samples:
lapply(c("ctrl","S24","R"), function(x) {
gene_list<-markers_sample_iso |>
filter(avg_log2FC>2 & cluster==x) |>
mutate(
gene_name = sub(".*--", "",gene)) |>pull(gene_name)
run_enrichR(gene_list,x)
})
## Uploading data to Enrichr... Done.
## Querying GO_Molecular_Function_2025... Done.
## Querying GO_Cellular_Component_2025... Done.
## Querying GO_Biological_Process_2025... Done.
## Querying Reactome_Pathways_2024... Done.
## Parsing results... Done.
## Uploading data to Enrichr... Done.
## Querying GO_Molecular_Function_2025... Done.
## Querying GO_Cellular_Component_2025... Done.
## Querying GO_Biological_Process_2025... Done.
## Querying Reactome_Pathways_2024... Done.
## Parsing results... Done.
## Uploading data to Enrichr... Done.
## Querying GO_Molecular_Function_2025... Done.
## Querying GO_Cellular_Component_2025... Done.
## Querying GO_Biological_Process_2025... Done.
## Querying Reactome_Pathways_2024... Done.
## Parsing results... Done.
## [[1]]
##
## [[2]]
##
## [[3]]
The genes involved in these pathway,etc… express different isoforms
.
Same analysis on the gene-level:
DE_samples_gene<-DE_samples(merged_gene,
output_dir=paste0(rootdir,"/analyses"),
"3samples_gene", #to specify what we are working with in the output file name
topn=10,
type="gene")
markers_sample_gene<-DE_samples_gene$markers
head(markers_sample_gene)
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## RPS28P7 0 -12.5357112 0.000 0.580 0 ctrl RPS28P7
## ENSG00000280800 0 -13.7771634 0.000 0.508 0 ctrl ENSG00000280800
## MIF 0 -14.4798635 0.000 0.470 0 ctrl MIF
## UCHL1 0 0.8881663 0.989 0.525 0 ctrl UCHL1
## KRT18 0 -10.8232339 0.002 0.465 0 ctrl KRT18
## KRT19 0 -14.0837799 0.000 0.455 0 ctrl KRT19
Let’s have a look at the genes that are differentially expressed between the samples:
lapply(c("ctrl","S24","R"), function(x) {
gene_list<-markers_sample_gene |>
filter(avg_log2FC>2 & cluster==x) |>pull(gene)
run_enrichR(gene_list,x)
})
## Uploading data to Enrichr... Done.
## Querying GO_Molecular_Function_2025... Done.
## Querying GO_Cellular_Component_2025... Done.
## Querying GO_Biological_Process_2025... Done.
## Querying Reactome_Pathways_2024... Done.
## Parsing results... Done.
## Uploading data to Enrichr... Done.
## Querying GO_Molecular_Function_2025... Done.
## Querying GO_Cellular_Component_2025... Done.
## Querying GO_Biological_Process_2025... Done.
## Querying Reactome_Pathways_2024... Done.
## Parsing results... Done.
## Uploading data to Enrichr... Done.
## Querying GO_Molecular_Function_2025... Done.
## Querying GO_Cellular_Component_2025... Done.
## Querying GO_Biological_Process_2025... Done.
## Querying Reactome_Pathways_2024... Done.
## Parsing results... Done.
## [[1]]
##
## [[2]]
##
## [[3]]
We will now use the cluster labels obtained after rpca integration and run DE analysis across these cluster to identify some cellular clones for example. We will also run pathway enrchment analysis on these DE’ed genes and transcripts.
dir.create(paste0(rootdir,"/analyses/DE_clusters"))
merged_gene<-readRDS("/media/inserm-root/P4/Single_cell_rna_seq_analyses_eline/sc3samples_whitelist/merge_integration/cell_cycle_regression/regressed_obj_gene_integrated.rds")
merged_iso<-readRDS("/media/inserm-root/P4/Single_cell_rna_seq_analyses_eline/sc3samples_whitelist/merge_integration/cell_cycle_regression/regressed_obj_isoform_integrated.rds")
res_de_iso<-DE_clusters(merged_iso, # an integrated seurat object
outdir=paste0(rootdir,"/analyses/DE_clusters"),
topn=10,
type="isoform")
res_de_gene<-DE_clusters(merged_gene, # an integrated seurat object
outdir=paste0(rootdir,"/analyses/DE_clusters"),
topn=10,
type="gene")
## Focus on individual clusters At transcript-level, clusters 1 and 4
seem to stand out more. At gene-level we might want to focus on clusters
1 that stays present across samples and cluster 2 that seem to appear in
the resistant cells. The following function extracts a list of DE’ed
markers in a specified cluster,returns the list and plots Vln Plots for
these markers
list_gene1<-extract_cluster_feature(num_cluster=1,
marker_table_list=res_de_gene$markers, # the $markers output from DE_cluster()
type="gene",
cluster_names="rpca_clusters", # or "clusters" if the object is not integrated
integrated=TRUE,
seu_obj=merged_gene # used in DE_cluster()
)
list_gene2<-extract_cluster_feature(num_cluster=2,
marker_table_list=res_de_gene$markers, # the $markers output from DE_cluster()
type="gene",
cluster_names="rpca_clusters", # or "clusters" if the object is not integrated
integrated=TRUE,
seu_obj=merged_gene # used in DE_cluster()
)
Some of these genes are indeed specific to a cluster but not all of them.
Do they have DE’ed transcripts in the 3 samples? For examples with the 10 first genes of cluster 1
num_cluster=1
type="gene"
setwd(paste0(rootdir,"/analyses/cluster",num_cluster,"_",type))
lapply(list_gene1[1:10], function(x) {
plot_feature_iso(merged_iso,
x,
sample_name = "",
output_dir = "./")
})
## [1] "plotted in .//featureplot_H4C3.png"
## [1] "plotted in .//featureplot_H3C4.png"
## [1] "plotted in .//featureplot_H3C15.png"
## [1] "plotted in .//featureplot_H1-4.png"
## [1] "plotted in .//featureplot_H1-3.png"
## [1] "plotted in .//featureplot_H1-5.png"
## [1] "plotted in .//featureplot_H4C4.png"
## [1] "plotted in .//featureplot_H3C2.png"
## [1] "plotted in .//featureplot_H2AC17.png"
## [1] "plotted in .//featureplot_CDK1.png"
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
Not really…
Can identify the biological function of cluster 1 with a pathway enrichment analysis?
#gene_list<-sub(".*--", "",)
run_enrichR(list_gene1[1:10],"cluster1 gene-level")
## Uploading data to Enrichr... Done.
## Querying GO_Molecular_Function_2025... Done.
## Querying GO_Cellular_Component_2025... Done.
## Querying GO_Biological_Process_2025... Done.
## Querying Reactome_Pathways_2024... Done.
## Parsing results... Done.
Let’s now focus on DE’ed transcripts in clusters 1 and 4.
list1<-extract_cluster_feature(num_cluster=1,
marker_table_list=res_de_iso$markers, # the $markers output from DE_cluster()
type="isoform",
cluster_names="rpca_clusters", # or "clusters" if the object is not integrated
integrated=TRUE,
seu_obj=merged_iso # used in DE_cluster()
)
list1
## [1] "ENST00000377803.4--H4C3"
## [2] "ENST00000356476.3--H3C4"
## [3] "ENST00000304218.6--H1-4"
## [4] "ENST00000403683.2--H3C15"
## [5] "ENSG00000175063.17-45814410-45816957-1--UBE2C"
## [6] "ENST00000244534.7--H1-3"
## [7] "ENST00000331442.5--H1-5"
## [8] "ENST00000614247.2--H4C4"
## [9] "ENST00000359611.4--H2AC17"
## [10] "ENST00000621411.3--H3C2"
## [11] "ENST00000699425.1--UBE2T"
## [12] "ENSG00000153048.11-8854921-8869006-1--CARHSP1"
## [13] "ENSG00000175063.17-45814410-45816952-1--UBE2C"
## [14] "ENST00000343677.4--H1-2"
## [15] "ENSG00000077152.12-202331657-202335014-1--UBE2T"
## [16] "ENST00000333151.5--H2AC14"
## [17] "ENST00000646651.1--UBE2T"
list4<-extract_cluster_feature(num_cluster=4,
marker_table_list=res_de_iso$markers, # the $markers output from DE_cluster()
type="isoform",
cluster_names="rpca_clusters", # or "clusters" if the object is not integrated
integrated=TRUE,
seu_obj=merged_iso # used in DE_cluster()
)
Are these transcripts DE’ed in the samples?
setwd(paste0(rootdir,"/analyses/cluster",1,"_isoform"))
lapply(list1,function(x) {
plot1<-FeaturePlot(merged_iso, features = x,combine=TRUE,split.by="sample") +theme(strip.text = element_text(size = 1)) #+labs(title = paste0("Differential transcript expression per sample for ",gene))
ggsave(filename = paste0("featureplot",x,".png") ,
plot = plot1, width=8,height=5,limitsize = FALSE)
print(paste0("plotted in ",paste0("featureplot",x,".png") ))
return(plot1)
})
## [1] "plotted in featureplotENST00000377803.4--H4C3.png"
## [1] "plotted in featureplotENST00000356476.3--H3C4.png"
## [1] "plotted in featureplotENST00000304218.6--H1-4.png"
## [1] "plotted in featureplotENST00000403683.2--H3C15.png"
## [1] "plotted in featureplotENSG00000175063.17-45814410-45816957-1--UBE2C.png"
## [1] "plotted in featureplotENST00000244534.7--H1-3.png"
## [1] "plotted in featureplotENST00000331442.5--H1-5.png"
## [1] "plotted in featureplotENST00000614247.2--H4C4.png"
## [1] "plotted in featureplotENST00000359611.4--H2AC17.png"
## [1] "plotted in featureplotENST00000621411.3--H3C2.png"
## [1] "plotted in featureplotENST00000699425.1--UBE2T.png"
## [1] "plotted in featureplotENSG00000153048.11-8854921-8869006-1--CARHSP1.png"
## [1] "plotted in featureplotENSG00000175063.17-45814410-45816952-1--UBE2C.png"
## [1] "plotted in featureplotENST00000343677.4--H1-2.png"
## [1] "plotted in featureplotENSG00000077152.12-202331657-202335014-1--UBE2T.png"
## [1] "plotted in featureplotENST00000333151.5--H2AC14.png"
## [1] "plotted in featureplotENST00000646651.1--UBE2T.png"
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
##
## [[11]]
##
## [[12]]
##
## [[13]]
##
## [[14]]
##
## [[15]]
##
## [[16]]
##
## [[17]]
Some of the transcripts seem to be specific to a sample.
Again, let’s try to identify the biological function of cluster 1 using pathway analysis:
gene_list1<-sub(".*--", "",list1)
run_enrichR(gene_list1,"cluster1 transcript-level")
## Uploading data to Enrichr... Done.
## Querying GO_Molecular_Function_2025... Done.
## Querying GO_Cellular_Component_2025... Done.
## Querying GO_Biological_Process_2025... Done.
## Querying Reactome_Pathways_2024... Done.
## Parsing results... Done.